TensorScatterAdd

在输入张量的指定位置执行加法操作。 根据给定的 indicesupdates,在输入张量中相应的位置将更新值加到原值上,生成新的输出张量。

\[output[indices] = input + updates\]
输入:
  • input - 输入张量数据地址。

  • input_shape - 输入张量形状数组。

  • input_rank - 输入张量维度数。

  • indices - 指定更新位置的索引数组。

  • updates - 更新数据地址。

  • num_unit - 每个更新单元的长度。

  • index_depth - 索引深度(indices 的最后一维长度)。

  • output_unit_offsets(int*, 可选) - 单元偏移数组(仅私有版本使用)。

  • strides(int*, 可选) - 步长数组(仅私有版本使用)。

  • core_mask(int, 可选) - 核掩码(仅共享存储版本使用)。

输出:
  • output - 输出张量数据地址,存放更新后的结果。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持 int8, int16, int32, fp32, fp64, cplx64, cplx128

  • MT7004 支持 fp16, fp32, int16, int32, cplx64

共享存储版本:

void fp_tensor_scatter_add_s(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int core_mask)
void i32_tensor_scatter_add_s(int32_t *input, int *input_shape, int input_rank, int *indices, int32_t *updates, int32_t *output, int num_unit, int index_depth, int core_mask)
void i16_tensor_scatter_add_s(int16_t *input, int *input_shape, int input_rank, int *indices, int16_t *updates, int16_t *output, int num_unit, int index_depth, int core_mask)
void i8_tensor_scatter_add_s(int8_t *input, int *input_shape, int input_rank, int *indices, int8_t *updates, int8_t *output, int num_unit, int index_depth, int core_mask)
void hp_tensor_scatter_add_s(half *input, int *input_shape, int input_rank, int *indices, half *updates, half *output, int num_unit, int index_depth, int core_mask)
void dp_tensor_scatter_add_s(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int core_mask)
void c64_tensor_scatter_add_s(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int core_mask)
void c128_tensor_scatter_add_s(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int core_mask)

C调用示例:

 1// FT78NE 示例
 2#include <stdio.h>
 3
 4int main(int argc, char* argv[]) {
 5    float *input = (float *)0xA0000000;
 6    float *output = (float *)0xA1000000;
 7    int input_shape[2] = {4, 4};
 8    int input_rank = 2;
 9    int indices[2] = {1, 2};
10    float updates[1] = {3.14};
11    int num_unit = 1;
12    int index_depth = 2;
13    int core_mask = 0xff;
14    fp_tensor_scatter_add_s(input, input_shape, input_rank, indices, updates, output, num_unit, index_depth, core_mask);
15    return 0;
16}

私有存储版本:

void fp_tensor_scatter_add_p(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
void i32_tensor_scatter_add_p(int32_t *input, int *input_shape, int input_rank, int *indices, int32_t *updates, int32_t *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
void i16_tensor_scatter_add_p(int16_t *input, int *input_shape, int input_rank, int *indices, int16_t *updates, int16_t *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
void i8_tensor_scatter_add_p(int8_t *input, int *input_shape, int input_rank, int *indices, int8_t *updates, int8_t *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
void hp_tensor_scatter_add_p(half *input, int *input_shape, int input_rank, int *indices, half *updates, half *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
void dp_tensor_scatter_add_p(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
void c64_tensor_scatter_add_p(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
void c128_tensor_scatter_add_p(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)

C调用示例:

 1// FT78NE 示例
 2#include <stdio.h>
 3
 4int main(int argc, char* argv[]) {
 5    float *input = (float *)0x10000000;
 6    float *output = (float *)0x10010000;
 7    int *output_unit_offsets = (int *)0x10020000;
 8    int *strides = (int *)0x10030000;
 9    int input_shape[2] = {4, 4};
10    int input_rank = 2;
11    int indices[2] = {0, 1};
12    float updates[1] = {2.71};
13    int num_unit = 1;
14    int index_depth = 2;
15    fp_tensor_scatter_add_p(input, input_shape, input_rank, indices, updates, output, num_unit, index_depth, output_unit_offsets, strides);
16    return 0;
17}